library(tidyverse)
library(rio)
library(cregg)
library(patchwork)

# set replication folder as working directory
setwd("~replication")

load("data_genderedcost_long.rdata")

'%!in%' <- function(x,y)!('%in%'(x,y))

# only include completed answers - and answers given before deadline
# 2021-12-20 21:49:52 was the last response within the time frame
df_long <- df_long %>% 
  filter(SurveyStatus==2)

df_long <- df_long %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")


#### INATTENTIVE RESPONDENTS ####

choices_inattentive <- df_long %>% filter(inattentive==1)

#unique candidates
unique(choices_inattentive$id)
# 638 candidates make SOME inattentive choice


## create df that remove candidates who make any inattentive choice
df_long_att <- df_long %>%
  filter(id %!in% choices_inattentive$id)

### Marginal means for attentive men
df_men_att <- df_long_att %>% 
  dplyr::filter(sex=="Man") %>% filter(dominant_exposure==0)

mm_men_att <- cregg::mm(df_men_att, # data
                        choice~position + remuneration + workload + work_environment, # formula
                        id = id)

mm_men_att$sex <- "Men - Attentive"

### Marginal means for attentive women
df_women_att <- df_long_att %>% 
  dplyr::filter(sex=="Woman") %>% filter(dominant_exposure==0)

mm_women_att <- cregg::mm(df_women_att, # data
                          choice~position + remuneration + workload + work_environment, # formula
                          id = id)

mm_women_att$sex <- "Women - Attentive"

### Marginal means for men in the whole sample
df_men <- df_long %>% 
  dplyr::filter(sex=="Man")

mm_men <- cregg::mm(df_men, # data
                    choice~position + remuneration + workload + work_environment, # formula
                    id = id)

mm_men$sex <- "Men - All choices (main)"

### Marginal means for women in the whole sample
df_women <- df_long %>% 
  dplyr::filter(sex=="Woman")

mm_women <- cregg::mm(df_women, # data
                      choice~position + remuneration + workload + work_environment, # formula
                      id = id)

mm_women$sex <- "Women - All choices (main)"


#combine estimates from the two subsets, rename to plurals and rearrange levels of feature factor
mm_sex_subsets_att <- bind_rows(mm_men, mm_women, mm_men_att, mm_women_att) %>%
  mutate(sex = factor(sex, levels = c("Women - Attentive",
                                      "Men - Attentive",
                                      "Women - All choices (main)",
                                      "Men - All choices (main)"))) %>% 
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment")))

mm_attentive_plot <- mm_sex_subsets_att %>% 
  ggplot(data=., aes(y = level, x = estimate, color = sex, shape = sex, linetype = sex)) +
  geom_point(aes(shape = sex),
             position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=estimate-std.error*1.39,
                     xmax=estimate+std.error*1.39),
                 position = position_dodge2(width = 0.8)) +
  xlab("Marginal mean") +
  ylab("") +
  scale_x_continuous(labels = seq(0.3,0.8,0.05), breaks = seq(0.3,0.8,0.05)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  coord_cartesian(xlim = c(0.3,0.77)) +
  scale_color_manual("", values = c("gray", "black", "gray", "black")) +
  scale_shape_manual("", values=c(2,1,17,16)) + #(16,17,1,2)
  scale_linetype_manual("", values = c(2,2,1,1)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE)) 

mm_attentive_plot


## estimate differences in marginal means
# whole sample
mm_sex_differences <- cregg::mm_diffs(df_long, # data
                                      choice~position + remuneration + workload + work_environment, # formula
                                      ~sex)

mm_sex_differences$att <- "All choices"

# attentive
df_long_att <- df_long_att %>% filter(dominant_exposure==0)

mm_sex_differences_att <- cregg::mm_diffs(df_long_att, # data
                                          choice~position + remuneration + workload + work_environment, # formula
                                          ~sex)

mm_sex_differences_att$att <- "Attentive"

mm_differences_att <- bind_rows(mm_sex_differences,mm_sex_differences_att) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment"))) %>%
  mutate(att = factor(att, levels = c("Attentive","All choices")))

mm_attentive_differences_plot <- 
  mm_differences_att %>% 
  ggplot(data=., aes(y = level, x = estimate, shape = att, linetype = att)) +
  geom_point(color = "black", position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=lower,
                     xmax=upper),
                 color = "black", position = position_dodge2(width = 0.8)) +
  xlab("Differences\n(women - men)") +
  ylab("") +
  #ggtitle("Differences in marginal between women and men") +
  scale_x_continuous(labels = seq(-0.2,0.2,0.05), breaks = seq(-0.2,0.2,0.05)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  scale_linetype_manual("", values = c(2,1)) +
  scale_shape_manual("", values = c(1,16)) +
  #scale_color_viridis_d(option = "viridis", "") +
  #scale_color_grey("", start = 0.1, end = 0.65) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  coord_cartesian(xlim = c(-0.1,0.1)) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE)) 

mm_attentive_plot + mm_attentive_differences_plot + plot_layout(widths = c(2, 1))

######################
### VICTIMS ##########
######################

df_victims <- df_long %>% 
  dplyr::filter(victim=="Victim")

mm_victims <- cregg::mm(df_victims, # data
                        choice~position + remuneration + workload + work_environment, # formula
                        id = id)

mm_victims$victim <- "Victim - All choices (main)"

df_nonvictims <- df_long %>% 
  dplyr::filter(victim=="Non-victim")

mm_nonvictims <- cregg::mm(df_nonvictims, # data
                           choice~position + remuneration + workload + work_environment, # formula
                           id = id)

mm_nonvictims$victim <- "Non-victim - All choices (main)"



### remove inattentive
df_victims_att <- df_long_att %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(victim=="Victim")


mm_victims_att <- cregg::mm(df_victims_att, # data
                            choice~position + remuneration + workload + work_environment, # formula
                            id = id)

mm_victims_att$victim <- "Victim - Attentive"

df_nonvictims_att <- df_long_att %>% 
  dplyr::filter(dominant_exposure==0) %>%
  dplyr::filter(victim=="Non-victim")

mm_nonvictims_att <- cregg::mm(df_nonvictims_att, # data
                               choice~position + remuneration + workload + work_environment, # formula
                               id = id)

mm_nonvictims_att$victim <- "Non-victim - Attentive"

#combine estimates from the two subsets, add attribute variable for color and rearrange order of levels
mm_victim_subsets_att <- bind_rows(mm_victims, mm_nonvictims, mm_victims_att, mm_nonvictims_att) %>% 
  mutate(victim = factor(victim, levels = c("Victim - Attentive",
                                            "Non-victim - Attentive",
                                            "Victim - All choices (main)",
                                            "Non-victim - All choices (main)"))) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment")))


# create plot for only work environment
mm_attentive_plot_victim <- mm_victim_subsets_att %>% 
  filter(feature=="Working Environment") %>% 
  ggplot(data=., aes(y = level, x = estimate, color = victim, shape = victim, linetype = victim)) +
  geom_point(position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=estimate-std.error*1.39,
                     xmax=estimate+std.error*1.39),
                 position = position_dodge2(width = 0.8)) +
  xlab("Marginal mean") +
  ylab("") +
  scale_x_continuous(labels = seq(0.3,0.8,0.05), breaks = seq(0.3,0.8,0.05)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  coord_cartesian(xlim = c(0.35,0.73)) +
  scale_color_manual("", values = c("gray", "black", "gray", "black")) +
  scale_shape_manual("", values=c(2,1,17,16)) + #(16,17,1,2)
  scale_linetype_manual("", values = c(2,2,1,1)) +
  scale_alpha_manual("", values = c(0.5,1)) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=2, byrow = TRUE, reverse = TRUE)) 

mm_attentive_plot_victim



## estimate differences in marginal means
# whole sample
mm_victim_differences <- cregg::mm_diffs(df_long, # data
                                         choice~position + remuneration + workload + work_environment, # formula
                                         ~victim)

mm_victim_differences$att <- "All choices"

# attentive
df_long_att <- df_long_att %>% filter(dominant_exposure==0)

mm_victim_differences_att <- cregg::mm_diffs(df_long_att, # data
                                             choice~position + remuneration + workload + work_environment, # formula
                                             ~victim)

mm_victim_differences_att$att <- "Attentive"

mm_differences_victim_att <- bind_rows(mm_victim_differences,mm_victim_differences_att) %>%
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment"))) %>%
  mutate(att = factor(att, levels = c("Attentive","All choices")))

mm_attentive_plot_victim_dif <- mm_differences_att %>% 
  filter(feature=="Working Environment") %>% 
  ggplot(data=., aes(y = level, x = estimate, shape = att, linetype = att)) +
  geom_point(color = "black", position = position_dodge2(width = 0.8)) +
  geom_linerange(aes(xmin=lower,
                     xmax=upper),
                 color = "black", position = position_dodge2(width = 0.8)) +
  xlab("Differences\n(victims - non-victims)") +
  ylab("") +
  #ggtitle("Differences in marginal between women and men") +
  scale_x_continuous(labels = seq(-0.2,0.2,0.05), breaks = seq(-0.2,0.2,0.05)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  scale_linetype_manual("", values = c(2,1)) +
  scale_shape_manual("", values = c(1,16)) +
  #scale_color_viridis_d(option = "viridis", "") +
  #scale_color_grey("", start = 0.1, end = 0.65) +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  coord_cartesian(xlim = c(-0.1,0.1)) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  guides(color=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         shape=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE),
         linetype=guide_legend(ncol=1, byrow = TRUE, reverse = TRUE)) 

mm_attentive_plot_victim + mm_attentive_plot_victim_dif + plot_layout(widths = c(2, 1))

### FIGURE J1 -- one big plot!

mm_attentive_plot + mm_attentive_differences_plot + mm_attentive_plot_victim + mm_attentive_plot_victim_dif + plot_layout(widths = c(3, 1), heights = c(4.5,1))

ggsave("figureJ1.pdf", height = 11.25, width = 10)

